To run this R Markdown file, enter the following lines in R:
library(rmarkdown)
library(knitr)
Age of the dataset.
age <- "GW14"
Choose an upper limit for nGenes and nUMIs for zoomed-in plots.
nGene.limit <- 3500
nUMI.limit <- 10000
Indicate the path to your list of ribosomal gene names.
ribogenes <- read_table("/kriegsteinlab/data1/carmen/2nd-trimester/qc/ribogenes.txt", col_names = F)$X1
Path to your dataset (in this case, an .RData workspace file), and the name of the Seurat object contained within the workspace. We load the data before running the .Rmd file as this takes a long time, and is otherwise done everytime the script is run (immune to caching.)
dataset <- "/kriegsteinlab/data1/aparna/homefiles/gw14.RData"
load(dataset)
seurat.obj <- "gw14"
Indicate the path to a metadata file with cell names and their brain regions of origin.
meta <- read_tsv("/kriegsteinlab/data1/carmen/2nd-trimester/metadata/colnames_GW14_080818_fixed.txt",
col_names = c("cell.name", "brain.region"))
Call the knitr::render function indicating the path to this .Rmd file:
render("/kriegsteinlab/data1/carmen/2nd-trimester/qc/code/QC_10X_plots_2018-08-30.Rmd")
knitr::opts_chunk$set(cache = TRUE, warning = FALSE, message = FALSE, cache.lazy = FALSE)
library(Seurat)
library(readr)
library(ggplot2)
library(scales)
library(viridis)
library(tidyr)
library(dplyr)
library(janitor)
For compatibility issues with the new version of Seurat.
assign("seurat.obj.new", CreateSeuratObject(raw.data = get(seurat.obj)@raw.data, min.cells = 0, min.genes = 0, project=age))
rm(list=seurat.obj)
multiplot <- function(..., plotlist=NULL, file, cols=1, layout=NULL) {
library(grid)
# Make a list from the ... arguments and plotlist
plots <- c(list(...), plotlist)
numPlots = length(plots)
# If layout is NULL, then use 'cols' to determine layout
if (is.null(layout)) {
# Make the panel
# ncol: Number of columns of plots
# nrow: Number of rows needed, calculated from # of cols
layout <- matrix(seq(1, cols * ceiling(numPlots/cols)),
ncol = cols, nrow = ceiling(numPlots/cols))
}
if (numPlots==1) {
print(plots[[1]])
} else {
# Set up the page
grid.newpage()
pushViewport(viewport(layout = grid.layout(nrow(layout), ncol(layout))))
# Make each plot, in the correct location
for (i in 1:numPlots) {
# Get the i,j matrix positions of the regions that contain this subplot
matchidx <- as.data.frame(which(layout == i, arr.ind = TRUE))
print(plots[[i]], vp = viewport(layout.pos.row = matchidx$row,
layout.pos.col = matchidx$col))
}
}
}
@ident and the @meta.data slots.seurat.obj.new@meta.data$orig.ident <- factor(meta$brain.region)
seurat.obj.new <- SetIdent(seurat.obj.new, cells.use = WhichCells(seurat.obj.new), ident.use = meta$brain.region)
summary(seurat.obj.new@meta.data)
## nGene nUMI orig.ident
## Min. : 41.0 Min. : 191 motor :20053
## 1st Qu.: 369.0 1st Qu.: 580 occipital :14200
## Median : 507.0 Median : 855 MGE :13241
## Mean : 579.9 Mean : 1056 hypothalamus:13141
## 3rd Qu.: 684.0 3rd Qu.: 1253 LGE :12771
## Max. :8426.0 Max. :60900 CGE :11483
## (Other) :13990
seurat.obj.new@meta.data %>% tabyl(orig.ident) %>% adorn_totals(where="row") %>% arrange(desc(n))
(Not working currently.)
violin.plot <- VlnPlot(seurat.obj.new, features.plot = c("nGene", "nUMI"), do.sort = T, point.size.use = .5, ident.include = as.character(statsByArea$orig.ident))
mito.genes <- grep(pattern="^MT-", x=rownames(seurat.obj.new@data), value = TRUE)
percent.mito <- colSums(seurat.obj.new@raw.data[mito.genes, ])/colSums(seurat.obj.new@raw.data)
seurat.obj.new <- AddMetaData(object = seurat.obj.new, metadata = percent.mito, col.name = "percent.mito")
ribo <- seurat.obj.new@raw.data[ribogenes, ]
percent.ribo <- colSums(ribo)/colSums(seurat.obj.new@raw.data)
seurat.obj.new <- AddMetaData(object = seurat.obj.new, metadata = percent.ribo, col.name = "percent.ribo")
statsByArea <- seurat.obj.new@meta.data %>% dplyr::group_by(orig.ident) %>%
summarise(mean.nGene = mean(nGene), mean.nUMI =mean(nUMI), mean.pctmito = mean(percent.mito), mean.pctribo= mean(percent.ribo))
statsByArea
seurat.obj.new@meta.data$orig.ident <- ordered(seurat.obj.new@meta.data$orig.ident,
levels = (statsByArea %>% arrange(mean.nGene))$orig.ident)
p1 <- ggplot(seurat.obj.new@meta.data) +
geom_jitter(aes(orig.ident, nGene, colour=orig.ident), alpha=0.2, size=0.25) +
geom_violin(aes(orig.ident, nGene), colour="grey60", scale = "width", alpha=0.3) +
scale_color_manual(values = c("#E58606", "#5D69B1", "#52BCA3", "#99C945", "#ffc300" ,"#ED645A", "#2F8AC4", "#764E9F", "#E58606", "#5D69B1", "#52BCA3", "#99C945")) +
theme_minimal() +
theme(plot.title = element_text(size= 12, hjust = 0.5),
axis.title.y=element_text(vjust = 2),
axis.title.x=element_text(vjust = -1),
axis.text.x = element_text(angle = 30)
) +
scale_y_continuous(breaks=seq(0, max(seurat.obj.new@meta.data$nGene), by=1000)) +
scale_x_discrete(breaks=(statsByArea %>% arrange(mean.nGene))$orig.ident) +
labs(title=paste0(age, ": Genes per Cell"),
x="",
y="Genes per Cell"
) +
guides(fill=FALSE, colour=FALSE)
p1
p2 <- ggplot(seurat.obj.new@meta.data) +
geom_jitter(aes(orig.ident, nGene, colour=orig.ident), alpha=0.2, size=0.25) +
geom_violin(aes(orig.ident, nGene), colour="grey60", scale = "width", alpha=0.3) +
scale_color_manual(values = c("#E58606", "#5D69B1", "#52BCA3", "#99C945", "#ffc300" ,"#ED645A", "#2F8AC4", "#764E9F", "#E58606", "#5D69B1", "#52BCA3", "#99C945")) +
theme_minimal() +
theme(plot.title = element_text(size= 12, hjust = 0.5),
axis.title.y=element_text(vjust = 2),
axis.title.x=element_text(vjust = -1),
axis.text.x = element_text(angle = 30)
) +
scale_y_continuous(breaks=seq(0, nGene.limit, by=nGene.limit/10), limits = c(0, nGene.limit)) +
scale_x_discrete(breaks=(statsByArea %>% arrange(mean.nGene))$orig.ident) +
labs(title=paste0(age, ": Genes per Cell (Zoom)"),
x="",
y="Genes per Cell"
) +
guides(fill=FALSE, colour=FALSE)
p2
p3 <-
ggplot(seurat.obj.new@meta.data) +
geom_jitter(aes(orig.ident, nUMI, colour=orig.ident), alpha=0.2, size=0.25) +
scale_color_manual(values = c("#E58606", "#5D69B1", "#52BCA3", "#99C945", "#ffc300" ,"#ED645A", "#2F8AC4", "#764E9F", "#E58606", "#5D69B1", "#52BCA3", "#99C945")) +
geom_violin(aes(orig.ident, nUMI), colour="grey60", colour="grey30", alpha=0.3, scale="width") +
theme_minimal() +
theme(plot.title = element_text(size= 12, hjust = 0.5),
axis.title.y=element_text(vjust = 2),
axis.title.x=element_text(vjust = -1),
axis.text.x = element_text(angle = 30)
) +
scale_y_continuous(breaks=seq(0, max(seurat.obj.new@meta.data$nUMI), by=10000)) +
labs(title=paste0(age, ": UMIs per Cell"),
x="",
y="UMIs per Cell"
) +
guides(fill=FALSE, colour=FALSE)
p3
p4 <-
ggplot(seurat.obj.new@meta.data) +
geom_jitter(aes(orig.ident, nUMI, colour=orig.ident), alpha=0.2, size=0.25) +
scale_color_manual(values = c("#E58606", "#5D69B1", "#52BCA3", "#99C945", "#ffc300" ,"#ED645A", "#2F8AC4", "#764E9F", "#E58606", "#5D69B1", "#52BCA3", "#99C945")) +
geom_violin(aes(orig.ident, nUMI), colour="grey60", colour="grey60", alpha=0.3, scale="width") +
theme_minimal() +
theme(plot.title = element_text(size= 12, hjust = 0.5),
axis.title.y=element_text(vjust = 2),
axis.title.x=element_text(vjust = -1),
axis.text.x = element_text(angle = 30)
) +
# ylim(0, 40000) +
scale_y_continuous(breaks=seq(0, nUMI.limit, by=nUMI.limit/10), limits=c(0, nUMI.limit)) +
labs(title=paste0(age, ": UMIs per Cell (Zoom)"),
x="",
y="UMIs per Cell"
) +
guides(fill=FALSE, colour=FALSE)
p4
seurat.obj.new@meta.data <- seurat.obj.new@meta.data %>% arrange(nUMI)
p5 <- ggplot(seurat.obj.new@meta.data %>% filter(nGene < nGene.limit)) +
geom_jitter(aes(orig.ident, nGene, colour=log10(nUMI)), alpha=0.6, size=0.25) +
scale_color_viridis(option="magma", begin = .1, end = .95) +
geom_violin(aes(orig.ident, nGene), colour="grey60", colour="grey60", alpha=0.1, scale="width") +
theme_minimal() +
theme(plot.title = element_text(size= 12, hjust = 0.5),
axis.title.y=element_text(vjust = 2),
axis.title.x=element_text(vjust = -1),
legend.position=c(0.9, 1),
legend.key.size = unit(0.5,"cm"),
legend.direction = "horizontal",
legend.text = element_text(angle = 30, size = 7),
legend.title = element_text(size = 6),
axis.text.x = element_text(angle = 30)
# legend.title.align = 0.9
) +
# ylim(0, 40000) +
scale_y_continuous(breaks=seq(0, nGene.limit, by=nGene.limit/10), limits=c(0, nGene.limit)) +
labs(title=paste0(age, ": Genes per Cell / log UMIs per Cell"),
x="",
y="Genes Detected",
color= "log10 UMI")
p5
seurat.obj.new@meta.data <- seurat.obj.new@meta.data %>% arrange(nGene)
p6 <- ggplot(seurat.obj.new@meta.data %>% filter(nUMI < nUMI.limit)) +
geom_jitter(aes(orig.ident, nUMI, colour=nGene), alpha=0.75, size=0.25) +
scale_color_viridis(option="magma", begin = .1, end = .9) +
geom_violin(aes(orig.ident, nUMI), colour="grey60", alpha=0.1, scale="width") +
theme_minimal() +
theme(plot.title = element_text(size= 12, hjust = 0.5),
axis.title.y=element_text(vjust = 2),
axis.title.x=element_text(vjust = -1),
legend.position=c(0.9,1),
legend.key.size = unit(0.5,"cm"),
legend.direction = "horizontal",
legend.text = element_text(angle = 30, size = 7),
legend.title = element_text(size = 12),
axis.text.x = element_text(angle = 30)
# legend.title.align = 0.9
) +
# ylim(0, 40000) +
scale_y_continuous(breaks=seq(0, nUMI.limit, by=nUMI.limit/10), limits=c(0, nUMI.limit)) +
labs(title=paste0(age, ": UMIs per Cell / Genes per Cell"),
x="",
y="UMIs Detected",
color= "")
p6
p7 <-
ggplot(seurat.obj.new@meta.data) +
geom_jitter(aes(orig.ident, percent.ribo, colour=orig.ident), alpha=0.2, size=0.25) +
scale_color_manual(values = c("#E58606", "#5D69B1", "#52BCA3", "#99C945", "#ffc300" ,"#ED645A", "#2F8AC4", "#764E9F", "#E58606", "#5D69B1", "#52BCA3", "#99C945")) +
geom_violin(aes(orig.ident, percent.ribo), colour="grey60", alpha=0.3, scale="width") +
theme_minimal() +
theme(plot.title = element_text(size= 12, hjust=0.5),
axis.title.y=element_text(vjust = 2),
axis.title.x=element_text(vjust = -1),
axis.text.x = element_text(angle = 30)
) +
# scale_y_continuous(breaks=seq(0,40000, by=5000)) +
# ylim(0,40000) +
labs(title=paste0(age, ": % Ribosomal Genes per Cell"),
x="",
y="% Ribosomal Genes"
) +
guides(fill=FALSE, colour=FALSE)
p7
p8 <-
ggplot(seurat.obj.new@meta.data) +
geom_jitter(aes(orig.ident, percent.mito, colour=orig.ident), alpha=0.2, size=0.25) +
scale_color_manual(values = c("#E58606", "#5D69B1", "#52BCA3", "#99C945", "#ffc300" ,"#ED645A", "#2F8AC4", "#764E9F", "#E58606", "#5D69B1", "#52BCA3", "#99C945")) +
geom_violin(aes(orig.ident, percent.mito), colour="grey60", alpha=0.3, scale="width") +
theme_minimal() +
theme(plot.title = element_text(size= 12, hjust=0.5),
axis.title.y=element_text(vjust = 2),
axis.title.x=element_text(vjust = -1),
axis.text.x = element_text(angle = 30)
) +
# scale_y_continuous(breaks=seq(0,0.1, by=0.01), limits = c(0, 0.1)) +
labs(title=paste0(age, ": % Mitochondrial Genes per Cell"),
x="",
y="% Mitochondrial Genes"
) +
guides(fill=FALSE, colour=FALSE)
p8
seurat.obj.new@meta.data <- seurat.obj.new@meta.data %>% arrange(percent.ribo)
ggplot(seurat.obj.new@meta.data) +
geom_jitter(aes(orig.ident, nGene, colour=percent.ribo), alpha=0.6, size=0.25) +
geom_violin(aes(orig.ident, nGene), colour="grey40", alpha=0.1, scale="width") +
scale_color_viridis(option="magma", begin = .15, end = .9) +
theme_minimal() +
theme(plot.title = element_text(size= 12, hjust = 0.5),
axis.title.y=element_text(vjust = 2),
axis.title.x=element_text(vjust = -1),
axis.text.x = element_text(angle = 30)
) +
scale_y_continuous(breaks=seq(0, max(seurat.obj.new@meta.data$nGene), by=1000)) +
scale_x_discrete(breaks=(statsByArea %>% arrange(mean.nGene))$orig.ident) +
labs(title=paste0(age, ": Genes Detected per Cell"),
x="Area",
y="nGene"
)
seurat.obj.new@meta.data <- seurat.obj.new@meta.data %>% arrange(percent.ribo)
p10 <- ggplot(seurat.obj.new@meta.data %>% filter(nGene < nGene.limit)) +
geom_jitter(aes(orig.ident, nGene, colour=percent.ribo), alpha=0.8, size=0.25) +
geom_violin(aes(orig.ident, nGene), colour="grey40", alpha=0.1, scale="width") +
scale_color_viridis(option="magma", begin = .1, end = .95) +
theme_minimal() +
theme(plot.title = element_text(size= 12, hjust = 0.5),
axis.title.y=element_text(vjust = 2),
axis.title.x=element_text(vjust = -1),
legend.position=c(0.85,1),
legend.key.size = unit(0.5,"cm"),
legend.direction = "horizontal",
legend.text = element_text(angle = 30, size = 7),
legend.title = element_text(size = 8),
axis.text.x = element_text(angle = 30)
# legend.title.align = 0.9
) +
scale_y_continuous(breaks=seq(0, nGene.limit, by=nGene.limit/10), limits = c(0, nGene.limit)) +
scale_x_discrete(breaks=(statsByArea %>% arrange(mean.nGene))$orig.ident) +
labs(title=paste0(age, ": Genes per Cell / % Ribosomal Genes\n(zoom-in)"),
x="",
y="Genes per Cell",
color="% ribosomal"
)
p10
seurat.obj.new@meta.data <- seurat.obj.new@meta.data %>% arrange(percent.mito)
p11 <- ggplot(seurat.obj.new@meta.data) +
geom_jitter(aes(orig.ident, nGene, colour=percent.mito), alpha=0.7, size=0.25) +
geom_violin(aes(orig.ident, nGene), colour="grey60", alpha=0.1, scale="width") +
scale_color_viridis(option="magma", begin = .15, end = .9) +
theme_minimal() +
theme(plot.title = element_text(size= 12),
axis.title.y=element_text(vjust = 2),
axis.title.x=element_text(vjust = -1),
axis.text.x = element_text(angle = 30)
) +
scale_y_continuous(breaks=seq(0, max(seurat.obj.new@meta.data$nGene), by=500)) +
scale_x_discrete(breaks=(statsByArea %>% arrange(mean.nGene))$orig.ident) +
labs(title=paste0(age, ": Number of Genes Detected per Cell"),
x="Area",
y="nGene"
)
p11
seurat.obj.new@meta.data <- seurat.obj.new@meta.data %>% arrange(percent.mito)
p12 <- ggplot(seurat.obj.new@meta.data %>% filter(nGene < nGene.limit)) +
geom_jitter(aes(orig.ident, nGene, colour=percent.mito), alpha=0.8, size=0.25) +
geom_violin(aes(orig.ident, nGene), colour="grey60", alpha=0.1, scale="width") +
scale_color_viridis(option="magma", begin = .15, end = .95) +
theme_minimal() +
theme(plot.title = element_text(size= 12, hjust = 0.5),
axis.title.y=element_text(vjust = 2),
axis.title.x=element_text(vjust = -1),
legend.position=c(0.85,1),
legend.key.size = unit(0.5,"cm"),
legend.direction = "horizontal",
legend.text = element_text(angle = 30, size = 7),
legend.title = element_text(size = 8),
# legend.title.align = 0.9
) +
scale_y_continuous(breaks=seq(0, nGene.limit, by=nGene.limit/10), limits = c(0, nGene.limit)) +
scale_x_discrete(breaks=(statsByArea %>% arrange(mean.nGene))$orig.ident) +
labs(title=paste0(age, ": Genes per Cell / % Mitochondrial Genes"),
x="",
y="Genes per Cell",
color="% mito"
)
p12
seurat.obj.new@meta.data <- seurat.obj.new@meta.data %>% arrange(percent.mito)
p13 <-
ggplot(seurat.obj.new@meta.data) +
geom_jitter(aes(orig.ident, percent.ribo, colour=percent.mito), alpha=0.7, size=0.25) +
geom_violin(aes(orig.ident, percent.ribo), colour="grey60", alpha=0.1, scale="width") +
scale_color_viridis(option="magma", begin = .10, end = .90) +
theme_minimal() +
theme(plot.title = element_text(size= 12, hjust = 0.5),
axis.title.y=element_text(vjust = 2),
axis.title.x=element_text(vjust = -1),
legend.position=c(0.87,.95),
legend.key.size = unit(0.5,"cm"),
legend.direction = "horizontal",
legend.text = element_text(angle = 30, size = 7),
legend.title = element_text(size = 8),
axis.text.x = element_text(angle = 30)
# legend.title.align = 0.9
) +
labs(title=paste0(age, ": % Ribosomal Genes / % Mitochondrial Genes per Cell"),
x="",
y="% Ribosomal Genes",
color="% mito"
)
p13
seurat.obj.new@meta.data <- seurat.obj.new@meta.data %>% arrange(percent.ribo)
p14 <-
ggplot(seurat.obj.new@meta.data) +
geom_jitter(aes(orig.ident, percent.mito, colour=percent.ribo), alpha=0.7, size=0.20) +
geom_violin(aes(orig.ident, percent.mito), colour="grey60", alpha=0.1, scale="width") +
scale_color_viridis(option="magma", begin = .10, end = .95) +
theme_minimal() +
theme(plot.title = element_text(size= 12, hjust = 0.5),
axis.title.y=element_text(vjust = 2),
axis.title.x=element_text(vjust = -1),
legend.position=c(0.85,.93),
legend.key.size = unit(0.5,"cm"),
legend.direction = "horizontal",
legend.text = element_text(angle = 30, size = 7),
legend.title = element_text(size = 8),
axis.text.x = element_text(angle = 30)
# legend.title.align = 0.9
) +
labs(title=paste0(age, ": % Mitochondrial Genes / % Ribosomal Genes per Cell"),
x="",
y="% Mitochondrial Genes",
color = "% ribosomal"
)
p14
meta.from.seurat <- seurat.obj.new@meta.data
rm(seurat.obj.new, matrix_scaled, matrix_pca, ribo, cluster)
png(paste0(age, "_QC_all.png"), width=25, height=16, units="in", res=120)
multiplot(p1, p3, p7, p8, p2, p4, p10, p12, p5, p6, p13, p14, cols=3)
dev.off()
## png
## 2